For this Seurat analysis, we used the following parameters:

pars <- as.data.frame(do.call("rbind", params))
colnames(pars) <- "parameters_used"
pars

Starting with this Seurat object from Mireille.

path <- "/fast/work/groups/cubi/milekm/mireille/de/rerun/downstream_analysis/"
sobj <- readRDS(file.path(path,params$path_to_object))
selected_cell_types <- sobj[[]] %>% 
  pull(predicted.id) %>% 
  unique()

# sample <- sobj[[]] %>%
#   select(group,source_name) %>%
#   mutate(sample=paste0(group,"_",source_name)) %>%
#   select(sample)

# sobj <- AddMetaData(sobj, metadata = sample)

# cluster_metadata_3 <- sobj[[]] %>%
#     mutate(age = str_extract(group, "^[0-9]+w")) %>%
#     mutate(strain = str_extract(group, "w(E|SPF|)")) %>% 
#     mutate(time = str_extract(group, "[0-9]+days")) %>%
#     mutate(arm = paste(age, strain, time, sep = "_")) %>%
#     group_by(arm) %>%
#     mutate(pseudoID = sprintf("ID%02d", as.numeric(as.factor(mouse)))) %>% 
#   select(sample, cell_type, group, mouse, age, location, time, strain,pseudoID) %>% 
#   distinct()

expr <- list()

for (cell_type in selected_cell_types) {
 for (sample in unique(sobj[[]]$sample)) {
   # sample <- unique(sobj[[]]$sample)[1]
   # cell_type <- selected_cell_types[1]
   cells <- Cells(sobj)[(sobj@meta.data$sample==sample & sobj$predicted.id==cell_type )  ] 
   cells <- cells[!is.na(cells)]
   if (length(cells) > 1) {
     expr[[paste0(cell_type,';',sample)]] <- rowSums(sobj@assays$RNA@counts[,cells])
   }
 }
}

for (sample in unique(sobj[[]]$sample)) {
 cells <- Cells(sobj)[(sobj@meta.data$sample==sample )]
 cells <- cells[!is.na(cells)]
 expr[[paste0('all;',sample)]] <- rowSums(sobj@assays$RNA@counts[,cells])
}


sample_name <- sub(".*-","",sub(";.*","", sub(';','-',names(expr))))

colData<-data.frame(sample=names(expr),
               cell_type=factor(sub(';.*','',names(expr))),
               sample_name=sample_name,
               group=sub(".*-","",sub("_","-",sub("_","-",sample_name))),
               mouse=gsub("-","_",sub("_.*","",sub("_","-",sample_name))) ,
               age=sub("_.*","",sub(".*-","",sub("_","-",sub("_","-",sample_name)))),
               compartment=sub(".*_", "", sub("_[fpd].*", "", sample_name)),
               time = sub(".*_", "", sub("days.*", "days", sample_name)),
               strain = ifelse(grepl("12w",sample_name), "young",
                               ifelse(grepl("26w_SPF", sample_name), "aged",
                                      ifelse(grepl("26w_nonSPF", sample_name),"immuno", NA))))

colData <- colData %>%
  mutate(age_time_strain = paste0(age, "_", time, "_", strain)) %>%
  group_by(age_time_strain) %>%
  mutate(pseudoID = sprintf("ID%02d", as.numeric(as.factor(mouse))))%>%
  ungroup()

counts <- do.call(cbind, expr)

Get number of reads per sample.

lengths <- colSums(counts)
df_lengths <- data.frame(sample = names(lengths),
                         n_reads = lengths) %>%
  mutate(group = sub(".*-","",sub("_","-",sub("_","-",sub(".*;", "",sample)))),
         celltype = sub(";.*", "", sample))  

ggplot(df_lengths, aes(celltype,n_reads,fill=group))+
  geom_boxplot(position=position_dodge(width=.5),width=.5, outlier.shape = 1, outlier.colour = "red")+
  geom_point(position=position_dodge(width=.5),colour='grey30', shape=1, size=2) +
  facet_wrap(~celltype, scales="free")

We run DESeq2 by including the pseudoID in the model. Here we assume that the age_time_location of the mouse is replicated in triplicate (pseudoID). We use all cells, there is not enough cells to look at subtypes.

# selected_cell_types <- c("Monocytes / Macrophages", "Neutrophils", "Myeloid Precursor", "T Cells", "B Cells", "Dendritic Cells", "all")
selected_cell_types <- c("all")
# selected_contrasts <- c("12wfx_5days_vs_12wfx_2days",
#                         "26wEfx_5days_vs_26wEfx_2days",
#                         "26wSPFfx_5days_vs_26wSPFfx_2days",
#                         "26wEfx_2days_vs_12wfx_2days",
#                         "26wSPFfx_2days_vs_12wfx_2days",
#                         "26wSPFfx_2days_vs_26wEfx_2days",
#                         "12wdist_5days_vs_12wfx_5days",
#                         "12wprox_5days_vs_12wfx_5days",
#                         "12wdist_5days_vs_12wprox_5days",
#                         "26wEdist_5days_vs_26wEfx_5days",
#                         "26wEprox_5days_vs_26wEfx_5days",
#                         "26wEdist_5days_vs_26wEprox_5days",
#                         "26wSPFdist_5days_vs_26wSPFfx_5days",
#                         "26wSPFprox_5days_vs_26wSPFfx_5days",
#                         "26wSPFdist_5days_vs_26wSPFprox_5days")

selected_contrasts <- c("12w_nonSPF_5days_fx_vs_12w_nonSPF_2days_fx",
                        "26w_nonSPF_5days_fx_vs_26w_nonSPF_2days_fx",
                        "26w_SPF_5days_fx_vs_26w_SPF_2days_fx",
                        "26w_nonSPF_2days_fx_vs_12w_nonSPF_2days_fx",
                        "26w_SPF_2days_fx_vs_12w_nonSPF_2days_fx",
                        "26w_SPF_2days_fx_vs_26w_nonSPF_2days_fx",
                        "12w_nonSPF_5days_distal_vs_12w_nonSPF_5days_fx",
                        "12w_nonSPF_5days_proximal_vs_12w_nonSPF_5days_fx",
                        "12w_nonSPF_5days_distal_vs_12w_nonSPF_5days_proximal",
                        "26w_nonSPF_5days_distal_vs_26w_nonSPF_5days_fx",
                        "26w_nonSPF_5days_proximal_vs_26w_nonSPF_5days_fx",
                        "26w_nonSPF_5days_distal_vs_26w_nonSPF_5days_proximal",
                        "26w_SPF_5days_distal_vs_26w_SPF_5days_fx",
                        "26w_SPF_5days_proximal_vs_26w_SPF_5days_fx",
                        "26w_SPF_5days_distal_vs_26w_SPF_5days_proximal")
                        
      


# all_groups <- colData$group %>%
#   unique()
# contrasts <- expand_grid(perturbation=all_groups, reference=all_groups) %>%
#       filter(perturbation < reference) %>%
#       mutate(contrast = paste0(perturbation,"_vs_",reference)) %>%
#       pull(contrast)

res <- list()

for (celltype in selected_cell_types){
  
  coldata_df <- colData %>%
    dplyr::filter(cell_type %in% celltype)
  counts_df <- subset(counts, select = coldata_df$sample)
  if (sum(colSums(counts_df)) != 0){
    dds <- DESeqDataSetFromMatrix(countData=as.matrix(counts_df),
                                  colData=coldata_df,
                                  design=~pseudoID + group)
    contrasts <- selected_contrasts
    sub_res <- list()
    
    for (comparison in contrasts){
      perturbation <- sub("_vs_.*", "", comparison)
      reference <- sub(".*_vs_", "", comparison)
      dds$group <- relevel(dds$group, ref = reference)
      
      if (celltype != "all" & celltype != "all_cells"){
        dds <- estimateSizeFactors(dds, type='poscounts')
        dds <- DESeq(dds, test = 'LRT', reduced = ~1, useT=TRUE, minmu=1e-6, minReplicatesForReplace=Inf)
      } else {
        dds <- estimateSizeFactors(dds)
        dds <- DESeq(dds)
      }
      
      if(params$shrinkage_type=="apeglm"){
        sub_res[[comparison]] <- lfcShrink(dds, coef=paste0("group_",comparison), type=params$shrinkage_type, format="DataFrame") %>%
          as.data.frame() %>%
          tibble::rownames_to_column('gene_name') %>%
          mutate(cell_type=celltype, contrast = comparison)
      } else {
        
        sub_res[[comparison]] <- lfcShrink(dds, contrast=c("group",perturbation, reference), type=params$shrinkage_type, format="DataFrame") %>%
          as.data.frame() %>%
          tibble::rownames_to_column('gene_name') %>%
          mutate(cell_type=celltype, contrast = comparison)
      }
    }
    
  }
  res[[celltype]] <- do.call("rbind", sub_res) %>%
    tibble::remove_rownames()
}

res_df <- do.call("rbind", res) %>%
  tibble::remove_rownames()
for (comparison in unique(res_df$contrast)){
  p <- res_df %>%                  
    dplyr::filter(!is.na(padj), contrast == comparison) %>%
    ggplot(aes(x=log2FoldChange,y=-log10(pvalue),color=padj < .05)) + 
    geom_point(size=.5, shape=1) +
    geom_label_repel(data=res_df %>%
                       dplyr::filter(contrast == comparison,padj < .05),
                     aes(label=gene_name),
                     segment.size=.25,segment.alpha=.5,size=3,color='black',max.overlaps=40,
                     label.padding=0.05,label.size=NA) +
    scale_color_manual(values=c('black','red')) +
    theme_classic() +
    theme(legend.position='none',
          strip.background=element_blank())+
    ggtitle(comparison)+
    facet_wrap(~cell_type, ncol = 2, scales = "free")
  print(p)
  
}

# for (comparison in unique(res_df$contrast)){
#   p <- res_df %>%
#     dplyr::filter(contrast == comparison) %>%
#     ggplot(aes(x=baseMean,y=log2FoldChange,color=padj < .05)) +
#     geom_point(size=.5, shape=1) +
#     geom_label_repel(data=res_df %>%
#                        dplyr::filter(contrast == comparison, padj < .05),
#                      aes(label=gene_name),
#                      segment.size=.25,segment.alpha=.5,size=3,color='black',max.overlaps=30,
#                      label.padding=0.05,label.size=NA) +
#     scale_color_manual(values=c('black','red')) +
#     scale_x_continuous(trans='log10') +
#     coord_cartesian(ylim =c(-4,4)) +
#     theme_classic() +
#     theme(legend.position='none',
#           strip.background=element_blank())+
#     ggtitle(comparison)+
#     facet_wrap(~cell_type, ncol = 2)
#   print(p)
# }
res_df %>%
  dplyr::arrange(padj) %>%
  dplyr::filter(!is.na(padj) & (padj < .05)) %>%
  dplyr::mutate_if(is.numeric, signif, 2) %>%
  DT::datatable(rownames=FALSE, extensions = 'Buttons', options = list(dom='frtBip', buttons = c('csv')))
dir.create("results_list_310124", recursive=T, showWarnings=T)
write.table(res_df,"results_list_310124/de_results_pseudoid_analysis.tsv", quote=F, sep="\t",row.names = F)
library(tmod)
# data(tmod)
# 
# library(orthomapper)
# mousehum <- orthologs(10090, 9606)
# mousehum$h_symbol <- entrez_annotate(mousehum$T.9606)$SYMBOL
# mousehum$m_symbol <- entrez_annotate(mousehum$T.10090)$SYMBOL
# 
# mousehum <- mousehum[order(mousehum$h_symbol),]
# rows_tmod <- which(mousehum$h_symbol %in% tmod$gv)
# mgenes <- mousehum$m_symbol[rows_tmod]
# 
# list_tmod <- sapply(tmod$gs2gv, function(x) tmod$gv[x] )
# list_tmod <- sapply(list_tmod, function(x) x[order(x)])
# list_tmod <- sapply(list_tmod, function(x) x[which(x %in%  mousehum$h_symbol)])
# list_tmod <- sapply(list_tmod, function(x) mousehum[which(mousehum$h_symbol %in% x), "m_symbol"])
# list_tmod <- sapply(list_tmod, function(x) which(mgenes %in% x))
# 
# tmod_mm <- tmod
# 
# tmod_mm$gv <- mgenes
# tmod_mm$gs2gv <- list_tmod

# save tmod mouse object
# saveRDS(tmod_mm, "tmod_mm.rds")
tmod_mm <- readRDS("tmod_mm.rds")

df2tmod <- function(df, gene_id_col=ncol(df), module_id_col=1, module_title_col=2) {

  require(tmod, quietly=TRUE)

  gene_ids <- df[[gene_id_col]]
  module_ids <- df[[module_id_col]]
  m2g <- tapply(gene_ids, module_ids, list)
  df[[gene_id_col]] <- NULL
  df <- df[!duplicated(df[[module_id_col]]), ]
  colnames(df)[module_id_col] <- "ID"
  colnames(df)[module_title_col] <- "Title"

  makeTmod(modules=df, modules2genes=m2g)
}

df <- as.data.frame(msigdbr::msigdbr(species='Mus musculus'))
df <- df[ , c("gs_name", "gs_id", "gs_cat", "gs_subcat", "gene_symbol") ]
colnames(df) <- c("Title", "ID", "Category", "Subcategory", "GeneID")
msig <- df2tmod(df[!is.na(df$GeneID),], gene_id_col=ncol(df), module_id_col=2, module_title_col=1)
sel <- (msig$gs$Category %in% c("H"))  | (msig$gs$Subcategory %in% c('CP:REACTOME','GO:BP','CP:KEGG'))
res <- split(res_df, f = res_df$contrast)

genes <- lapply(res, function(x) x %>%
                  filter(!is.na(padj)) %>%
                  arrange(pvalue))
lgenes <- lapply(genes, function(x) x$gene_name)

res.tmod <- lapply(lgenes, tmodCERNOtest, mset = msig[sel])

res.tmod.filtered <- lapply(res.tmod, function(x) subset(x, adj.P.Val < 1e-8 & AUC > .7))

pie <- lapply(genes, function(x) tmodDecideTests(x$gene_name, lfc = x$log2FoldChange,
                                                 pval = x$pvalue, lfc.thr=0,pval.thr=0.1, mset = msig[sel]))
pie <- lapply(pie, as.data.frame)

tmodPanelPlot(res.tmod.filtered, text.cex = .8, pie = pie, pie.style="rug", grid="at")

res <- split(res_df, f = res_df$contrast)

genes <- lapply(res, function(x) x %>%
                  filter(!is.na(padj)) %>%
                  arrange(pvalue))
lgenes <- lapply(genes, function(x) x$gene_name)

res.tmod <- lapply(lgenes, tmodCERNOtest, mset = tmod_mm)

res.tmod.filtered <- lapply(res.tmod, function(x) subset(x, adj.P.Val < 1e-6 & AUC > .7))

pie <- lapply(genes, function(x) tmodDecideTests(x$gene_name, lfc = x$log2FoldChange,
                                                 pval = x$pvalue, lfc.thr=0,pval.thr=0.1, mset = tmod_mm))
pie <- lapply(pie, as.data.frame)

tmodPanelPlot(res.tmod.filtered, text.cex = .8, pie = pie, pie.style="rug", grid="at")

sessionInfo()
## R version 4.3.1 (2023-06-16)
## Platform: x86_64-conda-linux-gnu (64-bit)
## Running under: Rocky Linux 8.7 (Green Obsidian)
## 
## Matrix products: default
## BLAS/LAPACK: /data/cephfs-1/work/groups/cubi/users/milekm_c/miniconda/envs/R-fixed-biomart/lib/libopenblasp-r0.3.24.so;  LAPACK version 3.11.0
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## time zone: Europe/Berlin
## tzcode source: system (glibc)
## 
## attached base packages:
## [1] stats4    stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] tmod_0.50.13                stringr_1.5.0              
##  [3] ggrepel_0.9.3               DESeq2_1.40.2              
##  [5] SummarizedExperiment_1.30.2 Biobase_2.60.0             
##  [7] MatrixGenerics_1.12.2       matrixStats_1.0.0          
##  [9] GenomicRanges_1.52.0        GenomeInfoDb_1.36.1        
## [11] IRanges_2.34.1              S4Vectors_0.38.1           
## [13] BiocGenerics_0.46.0         cowplot_1.1.1              
## [15] ggplot2_3.4.3               gtools_3.9.4               
## [17] tidyr_1.3.0                 dplyr_1.1.3                
## [19] SeuratObject_4.1.4          Seurat_4.4.0               
## 
## loaded via a namespace (and not attached):
##   [1] RColorBrewer_1.1-3      rstudioapi_0.15.0       jsonlite_1.8.7         
##   [4] magrittr_2.0.3          spatstat.utils_3.0-3    farver_2.1.1           
##   [7] rmarkdown_2.25          zlibbioc_1.46.0         vctrs_0.6.3            
##  [10] ROCR_1.0-11             spatstat.explore_3.2-3  RCurl_1.98-1.12        
##  [13] S4Arrays_1.0.4          htmltools_0.5.6         sass_0.4.7             
##  [16] sctransform_0.4.0       parallelly_1.36.0       KernSmooth_2.23-22     
##  [19] bslib_0.5.1             htmlwidgets_1.6.2       ica_1.0-3              
##  [22] plyr_1.8.9              plotly_4.10.2           zoo_1.8-12             
##  [25] cachem_1.0.8            igraph_1.5.1            mime_0.12              
##  [28] lifecycle_1.0.3         pkgconfig_2.0.3         Matrix_1.6-1.1         
##  [31] R6_2.5.1                fastmap_1.1.1           GenomeInfoDbData_1.2.10
##  [34] fitdistrplus_1.1-11     future_1.33.0           shiny_1.7.5            
##  [37] digest_0.6.33           colorspace_2.1-0        patchwork_1.1.3        
##  [40] tensor_1.5              irlba_2.3.5.1           crosstalk_1.2.0        
##  [43] labeling_0.4.3          progressr_0.14.0        fansi_1.0.4            
##  [46] spatstat.sparse_3.0-2   httr_1.4.7              polyclip_1.10-6        
##  [49] abind_1.4-5             compiler_4.3.1          withr_2.5.1            
##  [52] BiocParallel_1.34.2     gplots_3.1.3            MASS_7.3-60            
##  [55] DelayedArray_0.26.6     caTools_1.18.2          tools_4.3.1            
##  [58] lmtest_0.9-40           beeswarm_0.4.0          msigdbr_7.5.1          
##  [61] httpuv_1.6.11           future.apply_1.11.0     goftest_1.2-3          
##  [64] glue_1.6.2              nlme_3.1-163            promises_1.2.1         
##  [67] grid_4.3.1              Rtsne_0.16              cluster_2.1.4          
##  [70] reshape2_1.4.4          generics_0.1.3          gtable_0.3.4           
##  [73] spatstat.data_3.0-1     data.table_1.14.8       XVector_0.40.0         
##  [76] sp_2.1-0                utf8_1.2.3              spatstat.geom_3.2-5    
##  [79] RcppAnnoy_0.0.21        RANN_2.6.1              pillar_1.9.0           
##  [82] babelgene_22.9          later_1.3.1             splines_4.3.1          
##  [85] lattice_0.21-9          survival_3.5-7          deldir_1.0-9           
##  [88] tidyselect_1.2.0        locfit_1.5-9.8          miniUI_0.1.1.1         
##  [91] pbapply_1.7-2           knitr_1.44              gridExtra_2.3          
##  [94] scattermore_1.2         xfun_0.40               pheatmap_1.0.12        
##  [97] DT_0.28                 stringi_1.7.12          tagcloud_0.6           
## [100] lazyeval_0.2.2          yaml_2.3.7              evaluate_0.22          
## [103] codetools_0.2-19        tibble_3.2.1            cli_3.6.1              
## [106] uwot_0.1.16             xtable_1.8-4            reticulate_1.32.0      
## [109] munsell_0.5.0           jquerylib_0.1.4         Rcpp_1.0.11            
## [112] globals_0.16.2          spatstat.random_3.1-6   png_0.1-8              
## [115] XML_3.99-0.14           parallel_4.3.1          ellipsis_0.3.2         
## [118] bitops_1.0-7            listenv_0.9.0           plotwidgets_0.5.1      
## [121] viridisLite_0.4.2       scales_1.2.1            ggridges_0.5.4         
## [124] crayon_1.5.2            leiden_0.4.3            purrr_1.0.2            
## [127] rlang_1.1.1